{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark.sql import SparkSession\n",
    "from pyspark.sql.types import *\n",
    "from pyspark.sql.functions import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark.ml.feature import StringIndexer\n",
    "from pyspark.sql.window import Window\n",
    "\n",
    "# scikit-learn\n",
    "from sklearn.decomposition import PCA\n",
    "from sklearn.preprocessing import StandardScaler, MinMaxScaler\n",
    "from sklearn.model_selection import train_test_split, GridSearchCV\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.dummy import DummyClassifier\n",
    "from statsmodels.api import Logit\n",
    "from sklearn.decomposition import PCA\n",
    "from sklearn import metrics\n",
    "from sklearn.metrics import classification_report\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.manifold import TSNE\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "# others\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import sys\n",
    "import itertools\n",
    "import re\n",
    "from random import sample\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install gensim"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "from gensim.models.doc2vec import LabeledSentence\n",
    "from gensim.models import Doc2Vec\n",
    "from gensim.models import Word2Vec"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "#create SparkSession\n",
    "spark=SparkSession.builder.appName('seq_embedding').getOrCreate()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "#reading a file\n",
    "df = spark.read.csv('embedding_dataset.csv',header=True,inferSchema=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1096955"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "root\n",
      " |-- user_id: string (nullable = true)\n",
      " |-- page: string (nullable = true)\n",
      " |-- timestamp: timestamp (nullable = true)\n",
      " |-- visit_number: integer (nullable = true)\n",
      " |-- time_spent: double (nullable = true)\n",
      " |-- converted: integer (nullable = true)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df.printSchema()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "104087"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.select('user_id').distinct().count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-------------+------+\n",
      "|page         |count |\n",
      "+-------------+------+\n",
      "|product info |767131|\n",
      "|homepage     |142456|\n",
      "|added to cart|67087 |\n",
      "|others       |39919 |\n",
      "|offers       |32003 |\n",
      "|buy          |24916 |\n",
      "|reviews      |23443 |\n",
      "+-------------+------+\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df.groupBy('page').count().orderBy('count',ascending=False).show(10,False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+----------------------------------------------------------------+-------------+------------+-----------+---------+\n",
      "|user_id                                                         |page         |visit_number|time_spent |converted|\n",
      "+----------------------------------------------------------------+-------------+------------+-----------+---------+\n",
      "|8057ed24427be18922f640b20b60997e7d070946b6c8f48117ae4d6dad0ebb23|homepage     |0           |0.16666667 |1        |\n",
      "|8057ed24427be18922f640b20b60997e7d070946b6c8f48117ae4d6dad0ebb23|product info |0           |0.4        |1        |\n",
      "|8057ed24427be18922f640b20b60997e7d070946b6c8f48117ae4d6dad0ebb23|product info |0           |0.31666666 |1        |\n",
      "|8057ed24427be18922f640b20b60997e7d070946b6c8f48117ae4d6dad0ebb23|product info |0           |0.6333333  |1        |\n",
      "|8057ed24427be18922f640b20b60997e7d070946b6c8f48117ae4d6dad0ebb23|product info |0           |0.15       |1        |\n",
      "|8057ed24427be18922f640b20b60997e7d070946b6c8f48117ae4d6dad0ebb23|homepage     |1           |0.8333333  |1        |\n",
      "|8057ed24427be18922f640b20b60997e7d070946b6c8f48117ae4d6dad0ebb23|product info |1           |0.16666667 |1        |\n",
      "|8057ed24427be18922f640b20b60997e7d070946b6c8f48117ae4d6dad0ebb23|product info |2           |0.16666667 |1        |\n",
      "|8057ed24427be18922f640b20b60997e7d070946b6c8f48117ae4d6dad0ebb23|buy          |2           |0.016666668|1        |\n",
      "|8057ed24427be18922f640b20b60997e7d070946b6c8f48117ae4d6dad0ebb23|added to cart|2           |0.41666666 |1        |\n",
      "+----------------------------------------------------------------+-------------+------------+-----------+---------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df.select(['user_id','page','visit_number','time_spent','converted']).show(10,False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "# window for each user order by timestamp\n",
    "w = Window.partitionBy(\"user_id\").orderBy('timestamp')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "#creating a lagged column \n",
    "df = df.withColumn(\"previous_page\", lag(\"page\", 1, 'started').over(w))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+----------------------------------------------------------------+-------------------+-------------+-------------+\n",
      "|user_id                                                         |timestamp          |previous_page|page         |\n",
      "+----------------------------------------------------------------+-------------------+-------------+-------------+\n",
      "|004e96d0dc01f2541b7e5be735da6321b15f797ded220d5f6fb9d66910b5ce88|2017-04-10 20:23:09|started      |product info |\n",
      "|004e96d0dc01f2541b7e5be735da6321b15f797ded220d5f6fb9d66910b5ce88|2017-04-10 20:26:23|product info |product info |\n",
      "|004e96d0dc01f2541b7e5be735da6321b15f797ded220d5f6fb9d66910b5ce88|2017-04-12 14:12:40|product info |product info |\n",
      "|004e96d0dc01f2541b7e5be735da6321b15f797ded220d5f6fb9d66910b5ce88|2017-04-12 20:49:33|product info |product info |\n",
      "|004e96d0dc01f2541b7e5be735da6321b15f797ded220d5f6fb9d66910b5ce88|2017-04-13 12:18:12|product info |product info |\n",
      "|01158797281955155c5c6bbe7daaa368021adcc4eaf4b3794e1789b5ee412a34|2018-02-21 23:47:13|started      |homepage     |\n",
      "|01158797281955155c5c6bbe7daaa368021adcc4eaf4b3794e1789b5ee412a34|2018-02-21 23:49:17|homepage     |homepage     |\n",
      "|01158797281955155c5c6bbe7daaa368021adcc4eaf4b3794e1789b5ee412a34|2018-02-22 00:07:58|homepage     |homepage     |\n",
      "|01158797281955155c5c6bbe7daaa368021adcc4eaf4b3794e1789b5ee412a34|2018-02-22 11:08:24|homepage     |homepage     |\n",
      "|01158797281955155c5c6bbe7daaa368021adcc4eaf4b3794e1789b5ee412a34|2018-02-22 11:08:32|homepage     |added to cart|\n",
      "+----------------------------------------------------------------+-------------------+-------------+-------------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df.select('user_id','timestamp','previous_page','page').show(10,False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "# adding an indicator if current page is same as next page\n",
    "def indicator(page, prev_page):\n",
    "    if page == prev_page:\n",
    "        return 0\n",
    "    else:\n",
    "        return 1\n",
    "    \n",
    "page_udf = udf(indicator,IntegerType())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "# adding a column for indicator and cumulative indicator\n",
    "df = df.withColumn(\"indicator\",page_udf(col('page'),col('previous_page'))) \\\n",
    "        .withColumn('indicator_cummulative',sum(col('indicator')).over(w))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-------------+-------------+---------+---------------------+\n",
      "|previous_page|page         |indicator|indicator_cummulative|\n",
      "+-------------+-------------+---------+---------------------+\n",
      "|started      |product info |1        |1                    |\n",
      "|product info |product info |0        |1                    |\n",
      "|product info |product info |0        |1                    |\n",
      "|product info |product info |0        |1                    |\n",
      "|product info |product info |0        |1                    |\n",
      "|started      |homepage     |1        |1                    |\n",
      "|homepage     |homepage     |0        |1                    |\n",
      "|homepage     |homepage     |0        |1                    |\n",
      "|homepage     |homepage     |0        |1                    |\n",
      "|homepage     |added to cart|1        |2                    |\n",
      "|added to cart|homepage     |1        |3                    |\n",
      "|homepage     |added to cart|1        |4                    |\n",
      "|added to cart|homepage     |1        |5                    |\n",
      "|started      |homepage     |1        |1                    |\n",
      "|homepage     |product info |1        |2                    |\n",
      "|product info |product info |0        |2                    |\n",
      "|product info |product info |0        |2                    |\n",
      "|product info |product info |0        |2                    |\n",
      "|product info |product info |0        |2                    |\n",
      "|started      |product info |1        |1                    |\n",
      "+-------------+-------------+---------+---------------------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df.select('previous_page','page','indicator','indicator_cummulative').show(20,False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "# create window with user and indicator cummulative\n",
    "w2 = Window.partitionBy([\"user_id\",'indicator_cummulative']).orderBy('timestamp')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "# adding a column with time spent cumulative ( time spent by a user on a page  visited in continuation )\n",
    "df = df.withColumn('time_spent_cummulative',sum(col('time_spent')).over(w2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-------------------+-------------+-------------+---------+---------------------+-----------+----------------------+\n",
      "|timestamp          |previous_page|page         |indicator|indicator_cummulative|time_spent |time_spent_cummulative|\n",
      "+-------------------+-------------+-------------+---------+---------------------+-----------+----------------------+\n",
      "|2017-04-10 20:23:09|started      |product info |1        |1                    |3.2333333  |3.2333333             |\n",
      "|2017-04-10 20:26:23|product info |product info |0        |1                    |0.08       |3.3133333             |\n",
      "|2017-04-12 14:12:40|product info |product info |0        |1                    |0.08       |3.3933333             |\n",
      "|2017-04-12 20:49:33|product info |product info |0        |1                    |0.08       |3.4733333             |\n",
      "|2017-04-13 12:18:12|product info |product info |0        |1                    |0.08       |3.5533333000000002    |\n",
      "|2018-02-21 23:47:13|started      |homepage     |1        |1                    |0.16666667 |0.16666667            |\n",
      "|2018-02-21 23:49:17|homepage     |homepage     |0        |1                    |0.06666667 |0.23333334            |\n",
      "|2018-02-22 00:07:58|homepage     |homepage     |0        |1                    |0.06666667 |0.30000001            |\n",
      "|2018-02-22 11:08:24|homepage     |homepage     |0        |1                    |0.13333334 |0.43333334999999995   |\n",
      "|2018-02-22 11:08:32|homepage     |added to cart|1        |2                    |0.11666667 |0.11666667            |\n",
      "|2018-02-22 11:10:08|added to cart|homepage     |1        |3                    |0.05       |0.05                  |\n",
      "|2018-02-22 11:10:11|homepage     |added to cart|1        |4                    |0.083333336|0.083333336           |\n",
      "|2018-02-22 12:31:58|added to cart|homepage     |1        |5                    |1.65       |1.65                  |\n",
      "|2017-12-09 21:35:03|started      |homepage     |1        |1                    |0.25       |0.25                  |\n",
      "|2017-12-09 21:35:18|homepage     |product info |1        |2                    |0.1        |0.1                   |\n",
      "|2017-12-09 21:36:14|product info |product info |0        |2                    |0.15       |0.25                  |\n",
      "|2017-12-09 21:36:23|product info |product info |0        |2                    |0.33333334 |0.58333334            |\n",
      "|2017-12-09 21:36:52|product info |product info |0        |2                    |0.23333333 |0.81666667            |\n",
      "|2017-12-09 21:42:31|product info |product info |0        |2                    |0.21666667 |1.03333334            |\n",
      "|2017-04-24 06:45:25|started      |product info |1        |1                    |0.15       |0.15                  |\n",
      "+-------------------+-------------+-------------+---------+---------------------+-----------+----------------------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df.select('timestamp','previous_page','page','indicator','indicator_cummulative','time_spent','time_spent_cummulative').show(20,False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "# creating a window to get final page and final timespent \n",
    "w3 = Window.partitionBy([\"user_id\",'indicator_cummulative']).orderBy(col('timestamp').desc())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Add column for final page category and final time spent\n",
    "df = df.withColumn('final_page',first('page').over(w3))\\\n",
    "     .withColumn('final_time_spent',first('time_spent_cummulative').over(w3))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+----------------------+---------------------+-------------+-------------+-------------------+\n",
      "|time_spent_cummulative|indicator_cummulative|page         |final_page   |final_time_spent   |\n",
      "+----------------------+---------------------+-------------+-------------+-------------------+\n",
      "|3.5533333000000002    |1                    |product info |product info |3.5533333000000002 |\n",
      "|3.4733333             |1                    |product info |product info |3.5533333000000002 |\n",
      "|3.3933333             |1                    |product info |product info |3.5533333000000002 |\n",
      "|3.3133333             |1                    |product info |product info |3.5533333000000002 |\n",
      "|3.2333333             |1                    |product info |product info |3.5533333000000002 |\n",
      "|0.43333334999999995   |1                    |homepage     |homepage     |0.43333334999999995|\n",
      "|0.30000001            |1                    |homepage     |homepage     |0.43333334999999995|\n",
      "|0.23333334            |1                    |homepage     |homepage     |0.43333334999999995|\n",
      "|0.16666667            |1                    |homepage     |homepage     |0.43333334999999995|\n",
      "|0.11666667            |2                    |added to cart|added to cart|0.11666667         |\n",
      "+----------------------+---------------------+-------------+-------------+-------------------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df.select(['time_spent_cummulative','indicator_cummulative','page','final_page','final_time_spent']).show(10,False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "# user and pagelevel aggregation  \n",
    "aggregations=[]\n",
    "aggregations.append(max(col('final_page')).alias('page_emb'))\n",
    "aggregations.append(max(col('final_time_spent')).alias('time_spent_emb'))\n",
    "aggregations.append(max(col('converted')).alias('converted_emb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "#selecting relevant columns\n",
    "# extracting the dataframe with the data frame that will be used for embedding\n",
    "df_embedding = df.select(['user_id','indicator_cummulative','final_page','final_time_spent','converted']).groupBy(['user_id','indicator_cummulative']).agg(*aggregations)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "414770"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_embedding.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+----------------------------------------------------------------+---------------------+-------------+-------------------+-------------+\n",
      "|user_id                                                         |indicator_cummulative|page_emb     |time_spent_emb     |converted_emb|\n",
      "+----------------------------------------------------------------+---------------------+-------------+-------------------+-------------+\n",
      "|004e96d0dc01f2541b7e5be735da6321b15f797ded220d5f6fb9d66910b5ce88|1                    |product info |3.5533333000000002 |0            |\n",
      "|01158797281955155c5c6bbe7daaa368021adcc4eaf4b3794e1789b5ee412a34|1                    |homepage     |0.43333334999999995|0            |\n",
      "|01158797281955155c5c6bbe7daaa368021adcc4eaf4b3794e1789b5ee412a34|2                    |added to cart|0.11666667         |0            |\n",
      "|01158797281955155c5c6bbe7daaa368021adcc4eaf4b3794e1789b5ee412a34|3                    |homepage     |0.05               |0            |\n",
      "|01158797281955155c5c6bbe7daaa368021adcc4eaf4b3794e1789b5ee412a34|4                    |added to cart|0.083333336        |0            |\n",
      "|01158797281955155c5c6bbe7daaa368021adcc4eaf4b3794e1789b5ee412a34|5                    |homepage     |1.65               |0            |\n",
      "|020d29467c7810a85cdf9c85f8bad4c551bc79f5aee959f41b92d75de968d81b|1                    |homepage     |0.25               |0            |\n",
      "|020d29467c7810a85cdf9c85f8bad4c551bc79f5aee959f41b92d75de968d81b|2                    |product info |1.03333334         |0            |\n",
      "|032d6e8c20f41c18ec32b4178099e5ec6648715cb43953734e9b2428d92e84ed|1                    |product info |2.329999975        |0            |\n",
      "|0377ebcf2ac8f8aef0f6dc28d61479f85a30d88203b9a46fd8dde56bee6384c5|1                    |product info |1.0699999999999998 |0            |\n",
      "|03f484c3d0dc5afaf2589fff76f127ebd9f0c878c0ce027fe22938459a9b1454|1                    |homepage     |0.6500000100000001 |0            |\n",
      "|03f484c3d0dc5afaf2589fff76f127ebd9f0c878c0ce027fe22938459a9b1454|2                    |product info |0.033333335        |0            |\n",
      "|03f484c3d0dc5afaf2589fff76f127ebd9f0c878c0ce027fe22938459a9b1454|3                    |homepage     |0.5                |0            |\n",
      "|03f484c3d0dc5afaf2589fff76f127ebd9f0c878c0ce027fe22938459a9b1454|4                    |product info |0.28               |0            |\n",
      "|040828e6773148d00d1cbee03c90363e9c66c7d95d21313b6db6a20e9e45d210|1                    |product info |1.430000006        |0            |\n",
      "|05bd9a73b6f61509e6e7e0fa2d3012efccc6103779a0816d46911a2f2ef55a3e|1                    |product info |14.79999967        |0            |\n",
      "|068ea915e886eb11c747ea6cb137b76c73379dc7b71ac18fa1459a4f49528bed|1                    |homepage     |0.5                |0            |\n",
      "|068ea915e886eb11c747ea6cb137b76c73379dc7b71ac18fa1459a4f49528bed|2                    |others       |0.5                |0            |\n",
      "|068ea915e886eb11c747ea6cb137b76c73379dc7b71ac18fa1459a4f49528bed|3                    |product info |0.979999986        |0            |\n",
      "|06a572f2d5e9d5c56cda08c2ab688854c9f4c8823f2b7f3fe27d0a68596a346e|1                    |product info |0.56               |0            |\n",
      "|06d70abf021b34477cb7ee3151d3a9de1086dd270a1be5cc3c3407bf03a6393a|1                    |homepage     |0.5                |0            |\n",
      "|06d70abf021b34477cb7ee3151d3a9de1086dd270a1be5cc3c3407bf03a6393a|2                    |product info |5.4666667          |0            |\n",
      "|06d70abf021b34477cb7ee3151d3a9de1086dd270a1be5cc3c3407bf03a6393a|3                    |homepage     |1.2333333          |0            |\n",
      "|06d70abf021b34477cb7ee3151d3a9de1086dd270a1be5cc3c3407bf03a6393a|4                    |product info |4.31666666         |0            |\n",
      "|06d70abf021b34477cb7ee3151d3a9de1086dd270a1be5cc3c3407bf03a6393a|5                    |homepage     |0.85               |0            |\n",
      "|06d70abf021b34477cb7ee3151d3a9de1086dd270a1be5cc3c3407bf03a6393a|6                    |reviews      |2.3                |0            |\n",
      "|06d70abf021b34477cb7ee3151d3a9de1086dd270a1be5cc3c3407bf03a6393a|7                    |product info |0.2                |0            |\n",
      "|06d70abf021b34477cb7ee3151d3a9de1086dd270a1be5cc3c3407bf03a6393a|8                    |reviews      |0.36333333         |0            |\n",
      "|06d70abf021b34477cb7ee3151d3a9de1086dd270a1be5cc3c3407bf03a6393a|9                    |homepage     |1.55               |0            |\n",
      "|06d70abf021b34477cb7ee3151d3a9de1086dd270a1be5cc3c3407bf03a6393a|10                   |product info |0.08               |0            |\n",
      "+----------------------------------------------------------------+---------------------+-------------+-------------------+-------------+\n",
      "only showing top 30 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df_embedding.show(30, False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "# create a partition by user id ordered by indicator cumulative to get the journey\n",
    "w4 = Window.partitionBy([\"user_id\"]).orderBy('indicator_cummulative')\n",
    "w5 = Window.partitionBy([\"user_id\"]).orderBy(col('indicator_cummulative').desc())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_embedding = df_embedding.withColumn('journey_page', collect_list(col('page_emb')).over(w4))\\\n",
    "                          .withColumn('journey_time_temp', collect_list(col('time_spent_emb')).over(w4)) \\\n",
    "                         .withColumn('journey_page_final',first('journey_page').over(w5))\\\n",
    "                        .withColumn('journey_time_final',first('journey_time_temp').over(w5)) \\\n",
    "                        .select(['user_id','journey_page_final','journey_time_final','converted_emb'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------------+--------------------+--------------------+\n",
      "|             user_id|  journey_page_final|  journey_time_final|\n",
      "+--------------------+--------------------+--------------------+\n",
      "|004e96d0dc01f2541...|      [product info]|[3.5533333000000002]|\n",
      "|01158797281955155...|[homepage, added ...|[0.43333334999999...|\n",
      "|01158797281955155...|[homepage, added ...|[0.43333334999999...|\n",
      "|01158797281955155...|[homepage, added ...|[0.43333334999999...|\n",
      "|01158797281955155...|[homepage, added ...|[0.43333334999999...|\n",
      "|01158797281955155...|[homepage, added ...|[0.43333334999999...|\n",
      "|020d29467c7810a85...|[homepage, produc...|  [0.25, 1.03333334]|\n",
      "|020d29467c7810a85...|[homepage, produc...|  [0.25, 1.03333334]|\n",
      "|032d6e8c20f41c18e...|      [product info]|       [2.329999975]|\n",
      "|0377ebcf2ac8f8aef...|      [product info]|[1.0699999999999998]|\n",
      "+--------------------+--------------------+--------------------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df_embedding.select('user_id','journey_page_final','journey_time_final').show(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "414770"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_embedding.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "104087"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_embedding.select('user_id').distinct().count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_embedding = df_embedding.dropDuplicates()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "104087"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_embedding.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "104087"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_embedding.select('user_id').distinct().count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------------+--------------------+--------------------+\n",
      "|             user_id|  journey_page_final|  journey_time_final|\n",
      "+--------------------+--------------------+--------------------+\n",
      "|004e96d0dc01f2541...|      [product info]|[3.5533333000000002]|\n",
      "|01158797281955155...|[homepage, added ...|[0.43333334999999...|\n",
      "|020d29467c7810a85...|[homepage, produc...|  [0.25, 1.03333334]|\n",
      "|032d6e8c20f41c18e...|      [product info]|       [2.329999975]|\n",
      "|0377ebcf2ac8f8aef...|      [product info]|[1.0699999999999998]|\n",
      "|03f484c3d0dc5afaf...|[homepage, produc...|[0.65000001000000...|\n",
      "|040828e6773148d00...|      [product info]|       [1.430000006]|\n",
      "|05bd9a73b6f61509e...|      [product info]|       [14.79999967]|\n",
      "|068ea915e886eb11c...|[homepage, others...|[0.5, 0.5, 0.9799...|\n",
      "|06a572f2d5e9d5c56...|      [product info]|              [0.56]|\n",
      "+--------------------+--------------------+--------------------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df_embedding.select('user_id','journey_page_final','journey_time_final').show(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "# create pandas dataframe for embedding\n",
    "pd_df_embedding = df_embedding.toPandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>journey_page_final</th>\n",
       "      <th>journey_time_final</th>\n",
       "      <th>converted_emb</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>004e96d0dc01f2541b7e5be735da6321b15f797ded220d...</td>\n",
       "      <td>[product info]</td>\n",
       "      <td>[3.5533333000000002]</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>01158797281955155c5c6bbe7daaa368021adcc4eaf4b3...</td>\n",
       "      <td>[homepage, added to cart, homepage, added to c...</td>\n",
       "      <td>[0.43333334999999995, 0.11666667, 0.05, 0.0833...</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>020d29467c7810a85cdf9c85f8bad4c551bc79f5aee959...</td>\n",
       "      <td>[homepage, product info]</td>\n",
       "      <td>[0.25, 1.03333334]</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>032d6e8c20f41c18ec32b4178099e5ec6648715cb43953...</td>\n",
       "      <td>[product info]</td>\n",
       "      <td>[2.329999975]</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0377ebcf2ac8f8aef0f6dc28d61479f85a30d88203b9a4...</td>\n",
       "      <td>[product info]</td>\n",
       "      <td>[1.0699999999999998]</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                             user_id  \\\n",
       "0  004e96d0dc01f2541b7e5be735da6321b15f797ded220d...   \n",
       "1  01158797281955155c5c6bbe7daaa368021adcc4eaf4b3...   \n",
       "2  020d29467c7810a85cdf9c85f8bad4c551bc79f5aee959...   \n",
       "3  032d6e8c20f41c18ec32b4178099e5ec6648715cb43953...   \n",
       "4  0377ebcf2ac8f8aef0f6dc28d61479f85a30d88203b9a4...   \n",
       "\n",
       "                                  journey_page_final  \\\n",
       "0                                     [product info]   \n",
       "1  [homepage, added to cart, homepage, added to c...   \n",
       "2                           [homepage, product info]   \n",
       "3                                     [product info]   \n",
       "4                                     [product info]   \n",
       "\n",
       "                                  journey_time_final  converted_emb  \n",
       "0                               [3.5533333000000002]              0  \n",
       "1  [0.43333334999999995, 0.11666667, 0.05, 0.0833...              0  \n",
       "2                                 [0.25, 1.03333334]              0  \n",
       "3                                      [2.329999975]              0  \n",
       "4                               [1.0699999999999998]              0  "
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd_df_embedding.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "# making sure we don't have journeys with length less than 4\n",
    "pd_df_embedding = pd_df_embedding[pd_df_embedding['journey_length'] > 4 ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "# reset index\n",
    "pd_df_embedding = pd_df_embedding.reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "# train model\n",
    "EMBEDDING_SIZE = 100\n",
    "model = Word2Vec(pd_df_embedding['journey_page_final'], size=EMBEDDING_SIZE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.7907839000254171"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.total_train_time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Word2Vec(vocab=7, size=100, alpha=0.025)\n"
     ]
    }
   ],
   "source": [
    "# summarize the loaded model\n",
    "print(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "# summarize vocabulary\n",
    "page_categories = list(model.wv.vocab)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['product info', 'homepage', 'added to cart', 'others', 'reviews', 'offers', 'buy']\n"
     ]
    }
   ],
   "source": [
    "# page categories \n",
    "print(page_categories)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ 0.47489035  0.53834176 -0.47785276 -0.26488945 -0.2656599  -0.04322785\n",
      "  0.24362227  0.26430744 -0.48902759 -0.31393662  0.41263634  0.78189737\n",
      "  0.58100605 -0.4459599  -0.70117044 -0.63221812 -0.6908192  -0.6791628\n",
      " -0.02506013  0.21131983 -0.02721698 -0.20559131 -0.78862274 -0.55389541\n",
      " -0.1507041   0.7149269  -0.24301411  0.29431018 -0.52848756 -0.500494\n",
      "  0.16006927 -0.10355954 -0.36789769 -0.01349463 -0.40723842  0.15346751\n",
      " -0.79262614 -0.67456675 -0.18617149  0.69221032  0.53981733  0.75779319\n",
      "  0.0573662   0.85435468  0.78063792  0.57342744 -0.16319969  0.46502107\n",
      " -0.09518502  0.60525858  0.31979162 -0.26889852 -0.12189896  0.65022558\n",
      "  0.07857032  0.06138223  0.15626955  0.23680885  0.33999926 -0.54703128\n",
      " -0.21992962  0.83436728 -0.34557605 -0.69831383  0.4595826  -0.49346444\n",
      " -0.14114673  0.37797749  0.70894194  0.55426389 -0.40428343 -0.67311144\n",
      " -0.46010655 -0.44518954  0.7340765  -0.04775194 -0.44416061  0.45019379\n",
      " -0.54332632 -0.48565596  0.093257   -0.5141685   0.24856164  0.39611688\n",
      " -0.15698397 -0.45113751 -0.15056689  0.75211751 -0.06628865  0.07008368\n",
      "  0.46780539 -0.13114813 -0.61940897 -0.29163912 -0.3338908   0.40938324\n",
      "  0.08697812  0.74824899  0.53244144 -0.20717621]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py:2: DeprecationWarning: Call to deprecated `__getitem__` (Method will be removed in 4.0.0, use self.wv.__getitem__() instead).\n",
      "  \n"
     ]
    }
   ],
   "source": [
    "# sample embedding\n",
    "print(model['reviews'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py:2: DeprecationWarning: Call to deprecated `__getitem__` (Method will be removed in 4.0.0, use self.wv.__getitem__() instead).\n",
      "  \n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(100,)"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# embedding shape \n",
    "model['offers'].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py:2: DeprecationWarning: Call to deprecated `__getitem__` (Method will be removed in 4.0.0, use self.wv.__getitem__() instead).\n",
      "  \n"
     ]
    }
   ],
   "source": [
    "# capturing embedding matrix\n",
    "X = model[model.wv.vocab]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(7, 100)"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# embedding matrix shapee\n",
    "X.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [],
   "source": [
    "# run PCA with 2 compopnent to visualize page category embedding\n",
    "pca = PCA(n_components=2)\n",
    "result = pca.fit_transform(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAl8AAAJCCAYAAAD+96JYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xt01PWd//HX24AaQYmVtAVEgvuzyCUXQriEyGWLggoiRBTrotCuoq2sWtucSq20uvaoG+ttxbq45aJQYeUSqZfGekEBEU0k3MSIWCwEV6IYmkC0JLx/fxCzIAkBMnwmE56Pc+Yw853PfD+fmaPt0+/3O4O5uwAAABDGCdFeAAAAwPGE+AIAAAiI+AIAAAiI+AIAAAiI+AIAAAiI+AIAAAiI+AIAAAiI+AIAAAiI+AIAAAioRbQXcCht27b1pKSkaC8DAACgQYWFhZ+5e2JD45p0fCUlJamgoCDaywAAAGiQmX18OOM47QgAABAQ8QUAABAQ8QUAABAQ8QUAABBQo+PLzDqa2WtmtsHM1pvZzXWMGWxmO82sqOY2pbHzAgAAxKJIfNuxStLP3P1dMztVUqGZ/cXd3/vGuKXuPiIC8wEAAMSsRh/5cvdP3P3dmvvlkjZI6tDY/QIAADRHEb3my8ySJPWUtLKOpzPNbLWZvWhm3Q+xj4lmVmBmBaWlpZFcHgAAQNRFLL7MrLWkBZJucfe/f+PpdyV1cvdUSf8pKa++/bj7NHfPcPeMxMQGfyQWAAAgpkQkvsyspfaF1xx3X/jN59397+5eUXP/BUktzaxtJOYGAACIJZH4tqNJ+oOkDe7+QD1jvlszTmbWp2bezxs7NwAAQKyJxLcdsyRdLWmtmRXVbPulpLMkyd0flzRG0o/NrEpSpaQr3d0jMDcAAEBMaXR8ufsySdbAmEclPdrYuQAAAGIdv3APAAAQEPEFAAAQEPEFAAAQEPEFAAAQEPEFAAAQEPEFAAAQEPEFAAAQEPEFAAAQUCR+4R5AE5e3qkS5+cXaVlap9gnxyhnWRaN6doj2sgDguER8Ac1c3qoSTV64VpV7qiVJJWWVmrxwrSQRYAAQBZx2BJq53Pzi2vD6WuWeauXmF0dpRQBwfCO+gCbmkUceUdeuXfUv//Iv+uqrr3T++ecrLS1N8+bNO6r9bSurPKLtAIBji9OOQBPz2GOP6cUXX1Tnzp311ltvac+ePSoqKjrs11dVValFi//7V7t9QrxK6git9gnxEVkvAODIEF9AFD3wwAOaPn26JOnaa6/V+++/r48++kgjR47UuHHj9MQTT6i0tFRpaWlasGCBysrKdOutt6qiokJt27bVzJkz1a5dOw0ePFj9+/fX8uXLNXLkSJ111lm68847FRcXp6q4eMUP/80Bpx7jW8YpZ1iXaL1tADiuEV9AlBQWFmrGjBlauXKl3F19+/bV7Nmz9ec//1mvvfaa2rZtq759++r+++/Xc889pz179ujqq6/Ws88+q8TERM2bN0+33357bbyVlZXp9ddflyQlJycrPz9fHTp0UFlZmZb8dRffdgSAJoL4AqJk2bJlGj16tFq1aiVJys7O1tKlS+sdX1xcrHXr1umCCy6QJFVXV6tdu3a1z48dO7b2flZWliZMmKArrrhC2dnZGtWzA7EFAE0E8QVEibsf8fju3btrxYoVdT7/dcRJ0uOPP66VK1fq+eefV1pamoqKinTGGWc0ar0AgMjg245AlAwcOFB5eXnavXu3du3apUWLFmnAgAH1ju/SpYtKS0tr42vPnj1av359nWM3bdqkvn376q677lLbtm21ZcuWY/IeAABHjiNfQJSkp6drwoQJ6tOnj6R9F9z37Nmz3vEnnnii5s+fr5tuukk7d+5UVVWVbrnlFnXv3v2gsTk5Odq4caPcXUOGDFFqauoxex8AgCNjR3rqI6SMjAwvKCiI9jIAAAAaZGaF7p7R0DhOOwIAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAAREfAEAAATU6Pgys45m9pqZbTCz9WZ2cx1jzMweMbMPzWyNmaU3dl4AAIBY1CIC+6iS9DN3f9fMTpVUaGZ/cff39htzkaRzam59Jf2+5k8AAIDjSqOPfLn7J+7+bs39ckkbJHX4xrBLJT3p+7wlKcHM2jV2bgAAgFgT0Wu+zCxJUk9JK7/xVAdJW/Z7vFUHBxoAAECzF7H4MrPWkhZIusXd//7Np+t4idezn4lmVmBmBaWlpZFaHgAAQJMQkfgys5baF15z3H1hHUO2Suq43+MzJW2ra1/uPs3dM9w9IzExMRLLAwAAaDIi8W1Hk/QHSRvc/YF6hi2WdE3Ntx77Sdrp7p80dm4AAIBYE4lvO2ZJulrSWjMrqtn2S0lnSZK7Py7pBUkXS/pQ0m5JP4zAvAAAADGn0fHl7stU9zVd+49xSTc2di4AAIBYxy/cAwAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABER8AQAABBSR+DKz6Wa23czW1fP8YDPbaWZFNbcpkZgXAAAg1rSI0H5mSnpU0pOHGLPU3UdEaD4AAICYFJEjX+7+hqQdkdgXAABAcxbymq9MM1ttZi+aWff6BpnZRDMrMLOC0tLSgMsDAAA49kLF17uSOrl7qqT/lJRX30B3n+buGe6ekZiYGGh5AAAAYQSJL3f/u7tX1Nx/QVJLM2sbYm4AAICmJEh8mdl3zcxq7vepmffzEHMDAAA0JRH5tqOZPS1psKS2ZrZV0q8ltZQkd39c0hhJPzazKkmVkq50d4/E3AAAALEkIvHl7j9o4PlHte+nKAAAAI5r/MI9AABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQBGJLzObbmbbzWxdPc+bmT1iZh+a2RozS4/EvAAAALEmUke+Zkq68BDPXyTpnJrbREm/j9C8AAAAMSUi8eXub0jacYghl0p60vd5S1KCmbWLxNwAAACxJNQ1Xx0kbdnv8daabQAAAMeVUPFldWzzOgeaTTSzAjMrKC0tPcbLAgAACCtUfG2V1HG/x2dK2lbXQHef5u4Z7p6RmJgYZHEAAAChhIqvxZKuqfnWYz9JO939k0BzAwAANBktIrETM3ta0mBJbc1sq6RfS2opSe7+uKQXJF0s6UNJuyX9MBLzAkBzlLeqRLn5xdpWVqn2CfHKGdZFo3pymSzQXEQkvtz9Bw0875JujMRcANCc5a0q0eSFa1W5p1qSVFJWqckL10oSAQY0E/zCPQA0Ibn5xbXh9bXKPdXKzS+O0ooARBrxBaDZ2Lx5s3r06BHtZTTKtrLKI9oOIPYQXwDQhLRPiD+i7QBiD/EFoFmprq7Wddddp+7du2vo0KGqrKxUUVGR+vXrp5SUFI0ePVpffPGFJGnw4MH66U9/qoEDB6pr16565513lJ2drXPOOUe/+tWvavc5e/Zs9enTR2lpabr++utVXb3vtGDr1q31s5/9TOnp6RoyZIi+/m3CJ554Qr1791Zqaqouu+wy7d69W5K0adMm9evXT71799aUKVPUunXr2jlyc3PVu3dvlfzhRlW8+ccD3lN8yzjlDOtyTD83AOEQXwCalY0bN+rGG2/U+vXrlZCQoAULFuiaa67RfffdpzVr1ig5OVl33nln7fgTTzxRb7zxhm644QZdeumlmjp1qtatW6eZM2fq888/14YNGzRv3jwtX75cRUVFiouL05w5cyRJu3btUnp6ut59910NGjSodr/Z2dl65513tHr1anXt2lV/+MMfJEk333yzbr75Zr3zzjtq37597Rpeeuklbdy4UW+//bY+Kl6vJP9Urb/YKJPUISFe92Qnc7E90IxE5NuOANBUdO7cWWlpaZKkXr16adOmTSorK9OgQYMkSePHj9fll19eO37kyJGSpOTkZHXv3l3t2u37a2fPPvtsbdmyRcuWLVNhYaF69+4tSaqsrNS3v/1tSdIJJ5ygsWPHSpLGjRun7OxsSdK6dev0q1/9SmVlZaqoqNCwYcMkSStWrFBeXp4k6aqrrtLPf/5zSfvi66WXXlLPnj0lSRUVFZo84VT9678OP0afEoBoIr4ANCsnnXRS7f24uDiVlZUd1vgTTjjhgNeecMIJqqqqkrtr/Pjxuueeexqc22zf36Q2YcIE5eXlKTU1VTNnztSSJUsO+Tp31+TJk3X99dc3OAeA2MdpRwDNWps2bXT66adr6dKlkqSnnnqq9ijY4RgyZIjmz5+v7du3S5J27Nihjz/+WJK0d+9ezZ8/X5L0xz/+Ueedd54kqby8XO3atdOePXtqT1FKUr9+/bRgwQJJ0ty5c2u3Dxs2TNOnT1dFRYUkqaSkpHY+AM0PR74ANHuzZs3SDTfcoN27d+vss8/WjBkzDvu13bp10913362hQ4dq7969atmypaZOnapOnTqpVatWWr9+vXr16qU2bdpo3rx5kqR///d/V9++fdWpUyclJyervLxckvTQQw9p3Lhx+t3vfqfhw4erTZs2kqShQ4dqw4YNyszMlLTvQv7Zs2fXnt4E0LzYvh+fb5oyMjK8oKAg2ssAgDq1bt269mjV4di9e7fi4+NlZpo7d66efvppPfvss8dwhQBCMrNCd89oaBxHvgAgkMLCQk2aNEnuroSEBE2fPj3aSwIQBcQXABylIznqJUkDBgzQ6tWrj9FqAMQKLrgHAAAIiPgCAAAIiPgCAAAIiPgCAAAIiPgCAAAIiPgCAAAIiPgCAAAIiPgCAAAIiPgCAAAIiPgCAAAIiPgCAAAIiPgCAAAIiPgCAAAIiPgCAAAIiPgCAAAIqEW0FwAARyNvVYly84u1raxS7RPilTOsi0b17BDtZQFAg4gvADEnb1WJJi9cq8o91ZKkkrJKTV64VpIIMABNHqcdAcSc3Pzi2vD6WuWeauXmF0dpRQBw+IgvAPXavHmzevTocVSvXbJkid588806n1u8eLHuvffeBveRk5Oj7t27Kycn54Dt28oq6xxf33YAaEo47Qgch6qrqxUXF3dM51iyZIlat26t/v37H/TcyJEjNXLkyAb38V//9V8qLS3VSSeddMD29gnxKqkjtNonxB/9ggEgEI58Ac3I5s2bde6552r8+PFKSUnRmDFjtHv3bklSUlKS7rrrLp133nl65plnVFRUpH79+iklJUWjR4/WF198IUkqLCxUamqqMjMzNXXq1Np9z5w5U5MmTap9PGLECC1ZskSS9Oc//1np6elKTU3VkCFDtHnzZj3++ON68MEHlZaWpqVLlx6wzv33NWHCBN10003q37+/zj77bM2fP1/SvkDbtWuX+vbtq3nz5unjjz/WkCFDlJKSovKFU9Ri9+cH7DO+ZZxyhnWJ7AcKAMcA8QU0M8XFxZo4caLWrFmj0047TY899ljtcyeffLKWLVumK6+8Utdcc43uu+8+rVmzRsnJybrzzjslST/84Q/1yCOPaMWKFYc1X2lpqa677jotWLBAq1ev1jPPPKOkpCTdcMMN+ulPf6qioiINGDDgkPv45JNPtGzZMj333HO67bbbJO07NRkfH6+ioiKNHTtWkyZN0jXXXKM1a9bolht+pDPW/lEdEuJlkjokxOue7GQutgcQE4gvoJnp2LGjsrKyJEnjxo3TsmXLap8bO3asJGnnzp0qKyvToEGDJEnjx4/XG2+8cdD2q6++usH53nrrLQ0cOFCdO3eWJH3rW9864jWPGjVKJ5xwgrp166ZPP/20zjErVqzQVVddVbuuTWsLtPy27+uv9w7X8tu+T3gBiBnEF9DMmFm9j1u1anXI17r7Qa//WosWLbR3797ax19++WWDrzlc+1/T5e6H9ZrGzgkA0UJ8Ac3M3/72t9pThk8//bTOO++8g8a0adNGp59+eu21WE899ZQGDRqkhIQEtWnTpvZo2Zw5c2pfk5SUpKKiIu3du1dbtmzR22+/LUnKzMzU66+/rr/+9a+SpB07dkiSTj31VJWXl0fsffXv319z586tXVdd7wsAYgHxBTQzXbt21axZs5SSkqIdO3boxz/+cZ3jZs2apZycHKWkpKioqEhTpkyRJM2YMUM33nijMjMzFR//f98ezMrKUufOnZWcnKyf//znSk9PlyQlJiZq2rRpys7OVmpqau2pzUsuuUSLFi2q84L7o/HII49oxowZSklJ0VNPPaWHH3640fsEgGiwwz3EHw0ZGRleUFAQ7WUAMWPz5s0aMWKE1q1bF+2lAMBxx8wK3T2joXEc+QIAAAiI+AKakaSkJI56AUATR3wBAAAERHwBAAAERHwBAAAERHwBAAAERHwBAAAERHwBAAAERHwBAAAERHwBAAAERHwBAAAERHwBAAAERHwBAAAERHwBAAAERHwBAAAERHwBAAAEFJH4MrMLzazYzD40s9vqeH6CmZWaWVHN7dpIzAsAABBrWjR2B2YWJ2mqpAskbZX0jpktdvf3vjF0nrtPaux8AAAAsSwSR776SPrQ3T9y939Imivp0gjsFwAAoNmJRHx1kLRlv8dba7Z902VmtsbM5ptZx/p2ZmYTzazAzApKS0sjsDwAAICmIxLxZXVs8288/pOkJHdPkfSypFn17czdp7l7hrtnJCYmRmB5AAAATUck4murpP2PZJ0padv+A9z9c3f/qubhE5J6RWBeAACAmBOJ+HpH0jlm1tnMTpR0paTF+w8ws3b7PRwpaUME5gUAAIg5jf62o7tXmdkkSfmS4iRNd/f1ZnaXpAJ3XyzpJjMbKalK0g5JExo7LwAAQCwy929entV0ZGRkeEFBQbSXAQAA0CAzK3T3jIbG8Qv3AAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAARFfAAAAAUUkvszsQjMrNrMPzey2Op4/yczm1Ty/0sySIjEvAABArGl0fJlZnKSpki6S1E3SD8ys2zeG/aukL9z9/0l6UNJ9jZ0XAAAgFkXiyFcfSR+6+0fu/g9JcyVd+o0xl0qaVXN/vqQhZmYRmBsAACCmRCK+Okjast/jrTXb6hzj7lWSdko6IwJzAwAAxJRIxFddR7D8KMbsG2g20cwKzKygtLS00YsDAABoSiIRX1slddzv8ZmSttU3xsxaSGojaUddO3P3ae6e4e4ZiYmJEVgeAABA0xGJ+HpH0jlm1tnMTpR0paTF3xizWNL4mvtjJL3q7nUe+QIAAGjOWjR2B+5eZWaTJOVLipM03d3Xm9ldkgrcfbGkP0h6ysw+1L4jXlc2dl4AAIBY1Oj4kiR3f0HSC9/YNmW/+19KujwScwEAAMQyfuEeAAAgIOILAAAgIOILAAAgIOILAAAgIOILAAAgIOILAAAgIOILAAAgIOILAAAgIOILAAAgIOILAAAgIOILAAAgIOILAAAgIOILAAAgIOILAAAgIOILAAAgIOILAAAgIOILAAAgIOILAAAgIOILAAAgIOILAAAgoBbRXgCAo5e3qkS5+cXaVlap9gnxyhnWRaN6doj2sgAAh0B8ATEqb1WJJi9cq8o91ZKkkrJKTV64VpIIMABowjjtCMSo3Pzi2vD6WuWeauXmF0dpRQCAw0F8AcdQWVmZHnvssdrHS5Ys0YgRIyKy721llUe0HQDQNBBfwDH0zfhqrKqqqtr77RPi6xxT33YAQNPANV9ABD3wwAOaPn26JOnaa6/VW2+9pU2bNiktLU0XXHCBhg8froqKCo0ZM0br1q1Tr169NHv2bJmZCgsLdeutt6qiokJt27bVzJkz1a5dOw0ePFj9+/fX8uXLNXLkSJ111lm68847tesfe7X9yzh9+6p7a+ePbxmnnGFdovX2AQCHgfgCIqSwsFAzZszQypUr5e7q27evZs+erXXr1qmoqEjSvtOOq1at0vr169W+fXtlZWVp+fLl6tu3r/7t3/5Nzz77rBITEzVv3jzdfvvttSFXVlam119/XZKUnJys/Px8dejQQXPeeE+Pvfm/fNsRAGII8QVEyLJlyzR69Gi1atVKkpSdna2lS5ceNK5Pnz4688wzJUlpaWnavHmzEhIStG7dOl1wwQWSpOrqarVr1672NWPHjq29n5WVpQkTJuiKK65Qdna2/mVgt2P5tgAAEUZ8ARHi7oc17qSTTqq9HxcXp6qqKrm7unfvrhUrVtT5mq+DTpIef/xxrVy5Us8//7zS0tJUVFSkM844o3GLBwAEwwX3QIQMHDhQeXl52r17t3bt2qVFixYpKytL5eXlDb62S5cuKi0trY2vPXv2aP369XWO3bRpk/r27au77rpLbdu21ZYtWyL6PgAAxxZHvoAISU9P14QJE9SnTx9J+y6479Wrl7KystSjRw9ddNFFGj58eJ2vPfHEEzV//nzddNNN2rlzp6qqqnTLLbeoe/fuB43NycnRxo0b5e4aMmSIUlNTj+n7AgBElh3uqZJoyMjI8IKCgmgvAwAAoEFmVujuGQ2N47QjAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQMQXAABAQI2KLzP7lpn9xcw21vx5ej3jqs2sqOa2uDFzAgAAxLLGHvm6TdIr7n6OpFdqHtel0t3Tam4jGzknAABAzGpsfF0qaVbN/VmSRjVyfwAAAM1aY+PrO+7+iSTV/PntesadbGYFZvaWmR0y0MxsYs3YgtLS0kYuDwAAoGlp0dAAM3tZ0nfreOr2I5jnLHffZmZnS3rVzNa6+6a6Brr7NEnTJCkjI8OPYA4AAIAmr8H4cvfz63vOzD41s3bu/omZtZO0vZ59bKv58yMzWyKpp6Q64wsAAKA5a+xpx8WSxtfcHy/p2W8OMLPTzeykmvttJWVJeq+R8wIAAMSkxsbXvZIuMLONki6oeSwzyzCz/64Z01VSgZmtlvSapHvdnfgCAADHpQZPOx6Ku38uaUgd2wskXVtz/01JyY2ZBwAAoLngF+4BAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACalR8mdnlZrbezPaaWcYhxl1oZsVm9qGZ3daYOdF85K0qUda9r6rzbc8r695XlbeqJNpLAgDgmGvska91krIlvVHfADOLkzRV0kWSukn6gZl1a+S8iHF5q0o0eeFalZRVyiWVlFVq8sK1BBgAoNlrVHy5+wZ3L25gWB9JH7r7R+7+D0lzJV3amHkR+3Lzi1W5p/qAbZV7qpWb39A/TgAAxLYQ13x1kLRlv8dba7ahATNnztSkSZPqfK5169ZHtK/f/OY3uv/++w+DJwj8AAAQSElEQVTanpeXp/fee++o1ne0HnroIW0t/aLO57aVVQZdCwAAoTUYX2b2spmtq+N2uEevrI5tfoj5JppZgZkVlJaWHuYUOFqh46u6uloPPfSQvnNK3f/otU+ID7YWAACiocH4cvfz3b1HHbdnD3OOrZI67vf4TEnbDjHfNHfPcPeMxMTEw5wi9owaNUq9evVS9+7dNW3atNrtM2bM0Pe+9z0NGjRIy5cvr93+17/+VZmZmerdu7fuuOOOA/aVm5ur3r17KyUlRb/+9a9rt//2t79Vly5ddP7556u4+ODTeW+++aYWL16snJwcpaWladOmTSoqKlK/fv2UkpKi0aNH64svDj5C9emnn2r06NFKTU1Vamqq3nzzzUO+p9atW2vKlCnq27evfvvb32rbtm36dO5klc795QH7jW8Zp5xhXY7wkwQAIMa4e6NvkpZIyqjnuRaSPpLUWdKJklZL6n44++3Vq5c3V59//rm7u+/evdu7d+/un332mW/bts07duzo27dv96+++sr79+/vN954o7u7X3LJJT5r1ix3d3/00Ue9VatW7u6en5/v1113ne/du9erq6t9+PDh/vrrr3tBQYH36NHDd+3a5Tt37vR/+qd/8tzc3IPWMX78eH/mmWdqHycnJ/uSJUvc3f2OO+7wm2+++aDXXHHFFf7ggw+6u3tVVZWXlZXV+57c3SX5vHnzal/fqVMnLy0t9UXvbvX+97ziSb94zvvf84ovendrIz5RAACiS1KBH0bftGhMuJnZaEn/KSlR0vNmVuTuw8ysvaT/dveL3b3KzCZJypcUJ2m6u69vzLzNwSOPPKJFixZJkrZs2aKNGzfqf//3fzV48GB9fcRv7Nix+uCDDyRJy5cv14IFCyRJV199tX7xi19Ikl566SW99NJL6tmzpySpoqJCGzduVHl5uUaPHq1TTjlFkjRy5MgG17Rz506VlZVp0KBBkqTx48fr8ssvP2jcq6++qieffFKSFBcXpzZt2tT7ns444wzFxcXpsssuO2g/o3p20KieXP4HADi+NCq+3H2RpEV1bN8m6eL9Hr8g6YXGzNWcLFmyRC+//LJWrFihU045RYMHD9aXX34pSTKr6xI51fucu2vy5Mm6/vrrD9j+0EMPHXJfkXao93TyyScrLi4u2FoAAGjK+IX7KNi5c6dOP/10nXLKKXr//ff11ltvSZL69u2rJUuW6PPPP9eePXv0zDPP1L4mKytLc+fOlSTNmTOndvuwYcM0ffp0VVRUSJJKSkq0fft2DRw4UIsWLVJlZaXKy8v1pz/9qc61nHrqqSovL5cktWnTRqeffrqWLl0qSXrqqadqj4Ltb8iQIfr9738vad8F9H//+9/rfU8NzQkAwPGG+IqCCy+8UFVVVUpJSdEdd9yhfv36SZLatWun3/zmN8rMzNT555+v9PT02tc8/PDDmjp1qnr37q2dO3fWbh86dKiuuuoqZWZmKjk5WWPGjFF5ebnS09M1duxYpaWl6bLLLtOAAQPqXMuVV16p3Nxc9ezZU5s2bdKsWbOUk5OjlJQUFRUVacqUKQe95uGHH9Zrr72m5ORk9erVS+vXr6/3PdVl4sSJuuiii/TP//zPR/sRAgAQs2zf9WFNU0ZGhhcUFER7GQAAAA0ys0J3r/evW/waR74AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACahHtBURL3qoS5eYXa1tZpdonxCtnWBeN6tkh2ssCAADN3HEZX3mrSjR54VpV7qmWJJWUVWrywrWSRIABAIBj6rg87ZibX1wbXl+r3FOt3PziKK0IAAAcL47L+NpWVnlE2wEAACIlpuNr8+bN6tGjxxG/rn1C/BFtBwAAiJSYjq+jlTOsi+Jbxh2wLb5lnHKGdYnSigAAwPEi5uOrqqpK48ePV0pKisaMGaPdu3crKSlJn332mSSpoKBAgwcP1t69e3XOOeeotLRUo3p20G9HddenT0zU3t071SEhXvdkJ3OxPQAAOOZiPr6Ki4s1ceJErVmzRqeddpoee+yxOsedcMIJGjdunObMmSNJav35Bl08uJ/+9shVWn7b9wkvAAAQRMzHV8eOHZWVlSVJGjdunJYtW1bv2B/96Ed68sknJUnTp0/XD3/4wyBrBAAA+FrMx5eZHfS4RYsW2rt3ryTpyy+/rH2uY8eO+s53vqNXX31VK1eu1EUXXRR0rQAAADEfX3/729+0YsUKSdLTTz+t8847T0lJSSosLJQkLViw4IDx1157rcaNG6crrrhCcXFxB+0PAADgWIr5+OratatmzZqllJQU7dixQz/+8Y/161//WjfffLMGDBhwUGCNHDlSFRUVnHIEAABREdN/vVBSUpLee++9g7YPGDBAH3zwQZ2vWb16tVJTU3Xuuece6+UBAAAcJKbj60jde++9+v3vf1/7jUcAAIDQzN2jvYZ6ZWRkeEFBQbSXAQAA0CAzK3T3jIbGxfw1XwAAALGE+AIAAAiI+AIAAAiI+AIAAAiI+AIAAAiI+AIAAAiI+AIAAAioUfFlZpeb2Xoz22tm9f6uhZltNrO1ZlZkZvxwFwAAOG419hfu10nKlvRfhzH2n939s0bOBwAAENMaFV/uvkGSzCwyqwEAAIiQvFUlys0v1raySrVPiFfOsC4a1bNDtJcV7Jovl/SSmRWa2cRAcwIAgONU3qoSTV64ViVllXJJJWWVmrxwrfJWlUR7aQ3Hl5m9bGbr6rhdegTzZLl7uqSLJN1oZgMPMd9EMysws4LS0tIjmAIAAGCf3PxiVe6pPmBb5Z5q5eYXR2lF/6fB047ufn5jJ3H3bTV/bjezRZL6SHqjnrHTJE2T9v3F2o2dGwAAHH+2lVUe0faQjvlpRzNrZWanfn1f0lDtu1AfAACg1sUXX6yysrKI7Kt9QvwRbQ+psT81MdrMtkrKlPS8meXXbG9vZi/UDPuOpGVmtlrS25Ked/c/N2ZeAADQtLm79u7de0SveeGFF5SQkBCR+XOGdVF8y7gDtsW3jFPOsC4R2X9jNCq+3H2Ru5/p7ie5+3fcfVjN9m3ufnHN/Y/cPbXm1t3dfxuJhQMAgKZl8+bN6tq1q37yk58oPT1dTz31lDIzM5Wenq7LL79cFRUVevHFF3XFFVfUvmbJkiW65JJLJElJSUn67LN9v0o1e/Zs9enTR2lpabr++utVXV2t//mf/9Gtt94qSXr44Yd19tlnS5I2bdqk8847T5J02223qVu3bpoy/iJ1+ThPHRLiZZI6JMTrnuzkJvFtx8b+zhcAAECt4uJizZgxQ3fddZeys7P18ssvq1WrVrrvvvv0wAMP6Je//KWuv/567dq1S61atdK8efM0duzYA/axYcMGzZs3T8uXL1fLli31k5/8RHPmzNHQoUOVm5srSVq6dKnOOOMMlZSUaNmyZRowYIB27NihRYsW6f3335eZqaysLGJH0iKJ+AIAABHTqVMn9evXT88995zee+89ZWVlSZL+8Y9/KDMzUy1atNCFF16oP/3pTxozZoyef/55/cd//McB+3jllVdUWFio3r17S5IqKyv17W9/W9/97ndVUVGh8vJybdmyRVdddZXeeOMNLV26VNnZ2TrttNN08skn69prr9Xw4cM1YsSI4O//cBBfAAAgYlq1aiVp3zVfF1xwgZ5++umDxowdO1ZTp07Vt771LfXu3VunnnrqAc+7u8aPH6977rnnoNdmZmZqxowZ6tKliwYMGKDp06drxYoV+t3vfqcWLVro7bff1iuvvKK5c+fq0Ucf1auvvnps3mgj8BdrAwCAiOvXr5+WL1+uDz/8UJK0e/duffDBB5KkwYMH691339UTTzxx0ClHSRoyZIjmz5+v7du3S5J27Nihjz/+WJI0cOBA3X///Ro4cKB69uyp1157TSeddJLatGmjiooK7dy5UxdffLEeeughFRUVBXq3R4YjXwAAIOISExM1c+ZM/eAHP9BXX30lSbr77rv1ve99T3FxcRoxYoRmzpypWbNmHfTabt266e6779bQoUO1d+9etWzZUlOnTlWnTp00YMAAbdmyRQMHDlRcXJw6duyoc889V5JUXl6uSy+9VF9++aXcXQ8++GDQ93y4zL3p/o5pRkaGFxQURHsZAAAADTKzQnfPaGgcpx0BAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACMneP9hrqZWalkj6O9jpiRFtJn0V7Ec0An2Pk8FlGDp9l5PBZRg6f5cE6uXtiQ4OadHzh8JlZgbtnRHsdsY7PMXL4LCOHzzJy+Cwjh8/y6HHaEQAAICDiCwAAICDiq/mYFu0FNBN8jpHDZxk5fJaRw2cZOXyWR4lrvgAAAALiyBcAAEBAxFczYWa5Zva+ma0xs0VmlhDtNcUqM7vczNab2V4z45s8R8HMLjSzYjP70Mxui/Z6YpWZTTez7Wa2LtpriWVm1tHMXjOzDTX/bt8c7TXFKjM72czeNrPVNZ/lndFeUywivpqPv0jq4e4pkj6QNDnK64ll6yRlS3oj2guJRWYWJ2mqpIskdZP0AzPrFt1VxayZki6M9iKagSpJP3P3rpL6SbqRfyaP2leSvu/uqZLSJF1oZv2ivKaYQ3w1E+7+krtX1Tx8S9KZ0VxPLHP3De5eHO11xLA+kj5094/c/R+S5kq6NMpriknu/oakHdFeR6xz90/c/d2a++WSNkjqEN1VxSbfp6LmYcuaGxePHyHiq3n6kaQXo70IHLc6SNqy3+Ot4v/o0ESYWZKknpJWRnclscvM4sysSNJ2SX9xdz7LI9Qi2gvA4TOzlyV9t46nbnf3Z2vG3K59h9jnhFxbrDmczxJHzerYxn8ZI+rMrLWkBZJucfe/R3s9scrdqyWl1VxbvMjMerg71yUeAeIrhrj7+Yd63szGSxohaYjzGyKH1NBniUbZKqnjfo/PlLQtSmsBJElm1lL7wmuOuy+M9nqaA3cvM7Ml2nddIvF1BDjt2EyY2YWSfiFppLvvjvZ6cFx7R9I5ZtbZzE6UdKWkxVFeE45jZmaS/iBpg7s/EO31xDIzS/z62/RmFi/pfEnvR3dVsYf4aj4elXSqpL+YWZGZPR7tBcUqMxttZlslZUp63szyo72mWFLzxY9JkvK178Lm/3H39dFdVWwys6clrZDUxcy2mtm/RntNMSpL0tWSvl/zv49FZnZxtBcVo9pJes3M1mjff2j9xd2fi/KaYg6/cA8AABAQR74AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAACIr4AAAAC+v833oTPE2XiAAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7f6122426b38>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# plotting with page-categories\n",
    "# create a scatter plot of the projection\n",
    "plt.figure(figsize=(10,10))\n",
    "plt.scatter(result[:, 0], result[:, 1])\n",
    "\n",
    "for i,page_category in enumerate(page_categories):\n",
    "    plt.annotate(page_category,horizontalalignment='right', verticalalignment='top',xy=(result[i, 0], result[i, 1]))\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
